import os 
import logging

# Database
db_webgraph = 'db.webgraph'     # pre processing    -> binary storage 
    # src id | list of dest id 
 
db_sitegraph = 'db.sitegraph'     # pre processing    -> binary storage 

db_urlid = 'db.urlid'        # pre processing    -> bsddb
    # url | id

db_idurl = 'db.idurl'      # pre processing    -> bsddb 
    # id | url 

db_title = 'db.title'           # pre processing    -> bsddb
    # id | title text

db_transposegraph = 'db.transposegraph'       # post processing   -> binary storage 
    # dest id | list of src id

db_stat = 'db.stat'

# PATH
dirProject = '/'.join(os.getcwd().split('/')[:-1])
dirInput = os.path.join(dirProject,
    ('',
     'data/wbCt100Sep08/',
     'data/th-test/',
     'data/big/',
     'data/crawl-2009-10/',
    )[3]
)

docSeperator = '==P=>>>>=i===<<<<=T===>=A===<=!Junghoo!==>'
validContentTypes = ('text/html','text','text/plain','application/xhtml+xml')

# maximum tar .gz
maximumHost = 100000

os.system('rm db.* log')

